---
title: "-- Part3 -- Losing Perception -- BTW 2021 RTR-ML Paper"
output:
  html_document:
    df_print: paged
  word_document: default
  html_notebook: default
---

Load Data Subset and Libaries

```{r}
library(randomForestSRC)
library(dplyr)
library(magrittr)
library(tree)
library(ggplot2)
```

```{r}
# Home PC
setwd("E:/Fortis/Workspace/ML DOM/BTW 2021")
# Laptop
# setwd("C:/Users/fe300/Desktop/Workspace/ML DOM/BTW 2021")

options(scipen = 999)

## load data
load('dat.full.RData')
load('dat.stream44.RData')

## activate dataset (choose which data)
dat.full -> RTS2021
# dat -> RTS2021
```

```{r}
## Modifikationen des Datensatzes: 
# NAs umwandeln
# Vor- und NAchwahlbefragungsdaten in Faktoren umwandeln
RTS2021[,20:89] -> mod
mod[is.na(mod)] <- "keine Angabe"
RTS2021[,20:89] <- lapply(mod, factor)

RTS2021<-RTS2021[!(RTS2021$nach.sieger=="keine Angabe"),]
RTS2021$nach.sieger <- factor(RTS2021$nach.sieger)
```


# Section 2: Switching direction away from the respective candidates

## Predicting which participants switch their winning preference away from a candidate + determining the variables that have the greatest influence on this



# The variable "sieger.change" indicates this change of preference.

```{r, eval=TRUE}

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
## change away from Baerbock


RTS2021 -> RTS2021b
RTS2021b$victory.change.AB <- NA

RTS2021b <- RTS2021b %>%
mutate(victory.change.AB = ifelse(vor.sieger == "Annalena Baerbock" &
                                 nach.sieger == "Annalena Baerbock",
                               0, NA))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.AB = ifelse(vor.sieger == "Annalena Baerbock" &
                                   nach.sieger == "Armin Laschet",
                                 1, victory.change.AB))


RTS2021b <- RTS2021b %>%
  mutate(victory.change.AB = ifelse(vor.sieger == "Annalena Baerbock" &
                                   nach.sieger == "Olaf Scholz",
                                 1, victory.change.AB))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.AB = ifelse(vor.sieger == "Annalena Baerbock" &
                                   nach.sieger == "Unentschieden",
                                 1, victory.change.AB))


RTS2021b$victory.change.AB[is.na(RTS2021b$victory.change.AB)] <- 0

RTS2021b$victory.change.AB <- as.factor(RTS2021b$victory.change.AB)

# RTS2021b.test <- select(RTS2021b, vor.sieger, nach.sieger, victory.change.AB)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
## change to Laschet


RTS2021b$victory.change.AL <- NA

RTS2021b <- RTS2021b %>%
mutate(victory.change.AL = ifelse(vor.sieger == "Armin Laschet" &
                                 nach.sieger == "Armin Laschet",
                               0, NA))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.AL = ifelse(vor.sieger == "Armin Laschet"  &
                                   nach.sieger == "Annalena Baerbock",
                                 1, victory.change.AL))


RTS2021b <- RTS2021b %>%
  mutate(victory.change.AL = ifelse(vor.sieger == "Armin Laschet" &
                                   nach.sieger == "Olaf Scholz",
                                 1, victory.change.AL))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.AL = ifelse(vor.sieger == "Armin Laschet" &
                                   nach.sieger == "Unentschieden",
                                 1, victory.change.AL))

RTS2021b$victory.change.AL[is.na(RTS2021b$victory.change.AL)] <- 0
RTS2021b$victory.change.AL <- as.factor(RTS2021b$victory.change.AL)


#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
## change to Scholz

RTS2021b$victory.change.OS <- NA

RTS2021b <- RTS2021b %>%
mutate(victory.change.OS = ifelse(vor.sieger == "Olaf Scholz" &
                                 nach.sieger == "Olaf Scholz",
                               0, NA))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.OS = ifelse(vor.sieger == "Olaf Scholz" &
                                   nach.sieger == "Annalena Baerbock",
                                 1, victory.change.OS))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.OS = ifelse(vor.sieger == "Olaf Scholz" &
                                   nach.sieger == "Armin Laschet",
                                 1, victory.change.OS))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.OS = ifelse(vor.sieger == "Olaf Scholz" &
                                   nach.sieger == "Unentschieden",
                                 1, victory.change.OS))

RTS2021b$victory.change.OS[is.na(RTS2021b$victory.change.OS)] <- 0
RTS2021b$victory.change.OS <- as.factor(RTS2021b$victory.change.OS)

RTS2021b.test <- select(RTS2021b, vor.sieger, nach.sieger, victory.change.OS)

```


### Random Forest Models


```{r, eval=TRUE}
## AB

## Nur mit Vorbefragungsdaten
rf.2021.vchange.AB.a <- rfsrc(victory.change.AB ~., data=RTS2021b[,c(20:42,57:60,383)], ntree = 500, forest=TRUE)

## Nur mit RTS Messung + vor.sieger
rf.2021.vchange.AB.b <- rfsrc(victory.change.AB ~., data=RTS2021b[,c(35,90:382,383)], ntree = 500, forest=TRUE)

## Beides kombiniert: Urdaten + RTS (kein nach.kanzler includiert)
rf.2021.vchange.AB.c <- rfsrc(victory.change.AB ~., data=RTS2021b[,c(20:42,57:60,90:382,383)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)

```


```{r}
## AL
## Nur mit Vorbefragungsdaten
rf.2021.vchange.AL.a <- rfsrc(victory.change.AL ~., data=RTS2021b[,c(20:42,57:60,384)], ntree = 500, forest=TRUE)

## Nur mit RTS Messung + vor.sieger
rf.2021.vchange.AL.b <- rfsrc(victory.change.AL ~., data=RTS2021b[,c(35,90:382,384)], ntree = 500, forest=TRUE)

## Beides kombiniert: Urdaten + RTS (kein nach.kanzler includiert)
rf.2021.vchange.AL.c <- rfsrc(victory.change.AL ~., data=RTS2021b[,c(20:42,57:60,90:382,384)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)


```


```{r}
## OS
## Nur mit Vorbefragungsdaten
rf.2021.vchange.OS.a <- rfsrc(victory.change.OS ~., data=RTS2021b[,c(20:42,57:60,385)], ntree = 500, forest=TRUE)

## Nur mit RTS Messung + vor.sieger
rf.2021.vchange.OS.b <- rfsrc(victory.change.OS ~., data=RTS2021b[,c(35,90:382,385)], ntree = 500, forest=TRUE)

## Beides kombiniert: Urdaten + RTS (kein nach.kanzler includiert)
rf.2021.vchange.OS.c <- rfsrc(victory.change.OS ~., data=RTS2021b[,c(20:42,57:60,90:382,385)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)


```


Ergebnisse anzeigen

```{r, eval=TRUE}
## Ergebnisse Strukturdaten
rf.2021.vchange.AB.a

```

```{r}
rf.2021.vchange.AL.a
```

```{r}
rf.2021.vchange.OS.a

```


####




```{r, eval=TRUE}
## Ergebnisse RTS
rf.2021.vchange.AB.b

```

```{r, eval=TRUE}
## Ergebnisse RTS
rf.2021.vchange.AL.b

```

```{r, eval=TRUE}
## Ergebnisse RTS
rf.2021.vchange.OS.b

```


###

```{r, eval=TRUE}
## Ergebnisse der Kombination Urdaten + RTS 
rf.2021.vchange.AB.c

```

```{r, eval=TRUE}
## Ergebnisse der Kombination Urdaten + RTS 
rf.2021.vchange.AL.c

```


```{r, eval=TRUE}
## Ergebnisse der Kombination Urdaten + RTS 
rf.2021.vchange.OS.c

gg_roc_OS <- ggRandomForests::gg_roc(rf.2021.vchange.OS.c, which.outcome=2)
plot(gg_roc_OS)
```


## Vorhersage mit dem kombinierten Modell

```{r, eval=TRUE}
# nur für externe daten
## prediction <- predict.rfsrc(rf.2021.kchange.a, Data[,c(1:12,14:108)])

# new dataframe (backup)
#rf.2021.kchange.c$xvar -> RTS2021.kchange
#cbind(RTS2021.kchange[,6], rf.2021.kchange.c$yvar, rf.2021.kchange.c$predicted, rf.2021.kchange.c$predicted.oob) -> RTS2021b.predictions


#colnames(RTS2021b.predictions) <- c("vor.Kanzler", "kanzler.change", "prediction", "prediction.OOB")

#head(RTS2021b.predictions, n=30)

```

## Wichtigkeit der einzelnen Variablen für die Vorhersage

```{r, eval=FALSE}
## selected Variables
# vimp <- vimp(data, xvar.names = c("V1","V2","V3","V4","V5", "vor.kanzler"))

# all data
vimp.change.AB <- vimp(rf.2021.vchange.AB.b)

```

```{r, eval=FALSE}
vimp.change.AL <- vimp(rf.2021.vchange.AL.b)
```

```{r, eval=TRUE}
vimp.change.OS <- vimp(rf.2021.vchange.OS.c)
```


```{r, eval=FALSE}
vimp.change.AB[["importance"]] -> vimp.change.extra
as.data.frame(vimp.change.extra) -> vimp.change.extra
# knitr::kable(vimp.change$importance)

# sorted by importance
knitr::kable(vimp.change.extra[order(vimp.change.extra$all, decreasing=TRUE),])

vimp.change.AB$importance -> change.importance.data
data.frame(change.importance.data) -> change.importance.data

```

```{r, eval=FALSE}
vimp.change.AL[["importance"]] -> vimp.change.extra
as.data.frame(vimp.change.extra) -> vimp.change.extra
# knitr::kable(vimp.change$importance)

# sorted by importance
knitr::kable(vimp.change.extra[order(vimp.change.extra$all, decreasing=TRUE),])

vimp.change.AL$importance -> change.importance.data
data.frame(change.importance.data) -> change.importance.data

```


```{r, eval=TRUE}
vimp.change.OS[["importance"]] -> vimp.change.extra
as.data.frame(vimp.change.extra) -> vimp.change.extra
# knitr::kable(vimp.change$importance)

# sorted by importance
knitr::kable(vimp.change.extra[order(vimp.change.extra$`1`, decreasing=TRUE),])
# knitr::kable(vimp.change.extra[order(vimp.change.extra$all, decreasing=TRUE),])


vimp.change.OS$importance -> change.importance.data
data.frame(change.importance.data) -> change.importance.data

```

Für die Vorhersage des Wechsels der Siegerpräferenz waren besonders die oben aufgelisteten Variablen wichtig.  


## Decision Tree Visualisierung

```{r, eval=TRUE}
# library(tree)
# kanzler.change.tree <- RTS2021b[,c(25:37,43:50,163:808)]
# na.omit(kanzler.change.tree) -> kanzler.change.tree

# shift.to.merkel = ifelse(kanzler.change.tree$kanzler.change>=0.5, "Yes", "No")
# shift.to.schulz = ifelse(kanzler.change.tree$kanzler.change<=(-0.5), "Yes", "No")


# kanzler.change.tree.b = data.frame(kanzler.change.tree, shift.to.merkel)
# kanzler.change.tree.c = data.frame(kanzler.change.tree, shift.to.schulz)

# as.factor(kanzler.change.tree.b$shift.to.merkel) -> kanzler.change.tree.b$shift.to.merkel

# as.factor(kanzler.change.tree.c$shift.to.schulz) -> kanzler.change.tree.c$shift.to.schulz


```

```{r}
# tree.shift = tree(shift.to.merkel ~., data=kanzler.change.tree.b[,c(1:666,668)])

# tree.shift2 = tree(shift.to.schulz ~., data=kanzler.change.tree.c[,c(1:666,668)])
```

```{r, eval=TRUE,dpi=300, fig.width=10}
# summary(tree.shift)
# summary(tree.shift2)

# plot(tree.shift)
# text(tree.shift, pretty = 0)

# plot(tree.shift2)
# text(tree.shift2, pretty = 0)

# plot(kanzler.change.tree2$V45, kanzler.change.tree2$V78,pch=19,col=as.numeric(kanzler.change.tree2$shift.to.merkel))
# partition.tree(tree2,label="Shift to Merkel",add=TRUE)

```

```{r, eval=TRUE, fig.height=8, fig.width=11, dpi=300}

library(rpart)
library(rattle)
library(rpart.plot)
library(RColorBrewer)




## AB

mytree1 <- rpart(
  victory.change.AB ~ ., 
  data = RTS2021b[,c(20:42,57:60,90:382,383)], 
  method = "class",
  control = rpart.control(cp = 0.01)
)

mytree2 <- rpart(
  victory.change.AB ~ ., 
  data = RTS2021b[,c(35,90:382,383)], 
  method = "class",
  control = rpart.control(cp = 0.015)
)




## AL

mytree3 <- rpart(
  victory.change.AL ~ ., 
  data = RTS2021b[,c(20:42,57:60,90:382,384)], 
  method = "class",
  control = rpart.control(cp = 0.01)
)

mytree4 <- rpart(
  victory.change.AL ~ ., 
  data = RTS2021b[,c(35,90:382,384)], 
  method = "class",
  control = rpart.control(cp = 0.015)
)



## OS

mytree5 <- rpart(
  victory.change.OS ~ ., 
  data = RTS2021b[,c(20:42,57:60,90:382,385)], 
  method = "class",
  control = rpart.control(cp = 0.02)
)

mytree6 <- rpart(
  victory.change.OS ~ ., 
  data = RTS2021b[,c(35,90:382,385)], 
  method = "class",
  control = rpart.control(cp = 0.015)
)


####

# plot mytree
fancyRpartPlot(mytree1, main="Shift of victory preference from Baerbock", sub="To Post-RTS preference draw/OS/AL", caption = NULL)
fancyRpartPlot(mytree2, main="Shift of victory preference from Baerbock", sub="To Post-RTS preference draw/OS/AL", caption = NULL)

fancyRpartPlot(mytree3, main="Shift of victory preference from Laschet", sub="To Post-RTS preference draw/OS/AB", caption = NULL)
fancyRpartPlot(mytree4, main="Shift of victory preference from Laschet", sub="To Post-RTS preference draw/OS/AB", caption = NULL)

fancyRpartPlot(mytree5, main="Shift of victory preference from Scholz", sub="To Post-RTS preference draw/AL/AB", caption = NULL)
fancyRpartPlot(mytree6, main="Shift of victory preference from Scholz", sub="To Post-RTS preference draw/AL/AB", caption = NULL)
#fancyRpartPlot(mytree4, main="Shift of chancellor preference to Schulz", sub="From Pre-RTS preference -Neither- or -Angela Merkel-", caption = NULL)
#fancyRpartPlot(mytree2, main="Shift of chancellor preference to Merkel", sub="From Pre-RTS preference -Neither- or -Martin Schulz- w/o dichotome change variable", caption = NULL)
#fancyRpartPlot(mytree3, main="Shift of chancellor preference to Schulz", sub="From Pre-RTS preference -Neither- or -Angela Merkel- w/o dichotome change variable", caption = NULL)

#fancyRpartPlot(mytree5, main="Shift of chancellor preference to Merkel (cp=0.01)", sub="From Pre-RTS preference -Neither- or -Martin Schulz-", caption = NULL)
#fancyRpartPlot(mytree6, main="Shift of chancellor preference to Schulz (cp=0.01)", sub="From Pre-RTS preference -Neither- or -Martin Schulz-", caption = NULL)

# prp(mytree2)
# prp(mytree3)

## Plotmo
# library(plotmo)

# plotmo(mytree2, # type = "prob", nresponse = "No", 
#       graphtype2 = "persp",       # type2 = "image"
#       pt.col = ifelse(kanzler.change.tree2$shift.to.merkel == "No", "red", "green"))


#library(treeheatr)

#janitor::clean_names(kanzler.change.tree2) -> kanzler.change.tree3

#kanzler.change.tree3[1:3500,c(13:107,109)] -> last.test
#na.omit(last.test) -> last.test


#test0001 <- heat_tree(last.test, 
#          task="classification",
#          target_lab="shift_to_merkel")



```

## Shift to Baerbock


*###+#+#+#+++#####++##+#+#+


```{r, eval=TRUE, dpi=300, fig.width=12}
tree.cases.AB <- dplyr::filter(RTS2021b, vor.sieger==c("Olaf Scholz","Unentschieden") & V204>1.5)


#tree.cases2 <- dplyr::filter(kanzler.change.tree.b, vor.kanzler!="Angela Merkel" & V289>1.5 & V361<0.5 & V96<(-0.5))


#tree.cases3 <- dplyr::filter(kanzler.change.tree.b, vor.kanzler!="Angela Merkel" & V289<1.5 & vor.am!=(-2) & vor.am!=(-1) & vor.am!=0 & V88<(-0.5))
                               

#library(lattice)
#heatmap(as.matrix(tree.cases[, 22:666]),Rowv = NA, main="Path 1 - 22 cases")
#heatmap(as.matrix(tree.cases2[, 22:666]),Rowv = NA, main="Path 2 - 7 cases")
#heatmap(as.matrix(tree.cases3[, 22:666]),Rowv = NA, main="Path 2 - 13 cases")
```

## Shift to Baerbock (Cases)

```{r, eval=TRUE}
print(tree.cases.AB[,c(35,43,293, 383)])
#print(tree.cases2[, c(117,310,382,667:668)])
#print(tree.cases3[, c(14,109,310,382,667:668)])

```

Man bemerke die relative Häufigkeit mit der hier ein Swing Richtung AB auftritt.



```{r, fig.height=6, fig.width=10, dpi=300}
qplot.AB <- qplot(RTS2021b$vor.sieger, RTS2021b$V288, colour = RTS2021b$victory.change.AB, alpha=0.8)

qplot.AB + geom_hline(yintercept=1.5) + geom_vline(xintercept=(-0.25)) + ggtitle("Participants changing victory perception from Annalena Baerbock") + xlab("Pre-evaluation: Who will win the debate?") + ylab("speaking section V288") + labs(colour = "Participant switched from Baerbock\n after debate")

## No2
qplot.AB2 <- qplot(RTS2021b$V256, RTS2021b$V101, colour = RTS2021b$victory.change.AB, alpha=0.8)

qplot.AB2 + geom_hline(yintercept=1.5) + geom_vline(xintercept=(-0.5)) + ggtitle("Participants changing victory perception to Annalena Baerbock") + 
xlab("speaking section V256") + ylab("speaking section V101") + labs(colour = "Participant switched to Baerbock\n after debate")
```

```{r, fig.height=6, fig.width=10, dpi=300}
qplot.AL <- qplot(RTS2021b$V153, RTS2021b$V157, colour = RTS2021b$victory.change.AL, alpha=0.8)

qplot.AL + geom_hline(yintercept=2.5) + geom_vline(xintercept=1.5) + ggtitle("Participants changing victory perception to Armin Laschet") + xlab("speaking section V153") + ylab("speaking section V157") + labs(colour = "Participant switched to Laschet\n after debate")

##No2
qplot.AL2 <- qplot(RTS2021b$V288, RTS2021b$V157, colour = RTS2021b$victory.change.AL, alpha=0.8)

qplot.AL2 + geom_hline(yintercept=1.5) + geom_vline(xintercept=0.5) + ggtitle("Participants changing victory perception to Armin Laschet") + xlab("speaking section V288") + ylab("speaking section V157") + labs(colour = "Participant switched to Laschet\n after debate")

```

```{r, fig.height=6, fig.width=10, dpi=300}
qplot.OS <- qplot(RTS2021b$V244, RTS2021b$V17, colour = RTS2021b$victory.change.OS, alpha=0.8)

qplot.OS + geom_hline(yintercept=0.5) + geom_vline(xintercept=1.5) + ggtitle("Participants changing victory perception to Olaf Scholz") + xlab("speaking section V244") + ylab("speaking section V17") + labs(colour = "Participant switched to Scholz\n after debate")
```


```{r}
qplot.OS2 <- qplot(RTS2021b$V204, RTS2021b$V171, colour = RTS2021b$victory.change.OS, alpha=0.8)

qplot.OS2 + geom_hline(yintercept=1.5) + geom_vline(xintercept=(-0.5)) + ggtitle("Participants changing victory perception from Olaf Scholz to other candidate") + xlab("speaking section V169") + ylab("speaking section V171") + labs(colour = "Participant switched from Scholz\n after debate")
```
```{r}
qplot.OS3 <- qplot(RTS2021b$V204, RTS2021b$vor.pid, colour = RTS2021b$victory.change.OS, alpha=0.8)

qplot.OS3 + geom_vline(xintercept=(-0.5)) + ggtitle("Participants changing victory perception from Olaf Scholz to other candidate") + xlab("speaking section V204") + ylab("pre-party ID") + labs(colour = "Participant switched from Scholz\n after debate")
```




---------------------------
#+#+#+#+#+++#+#+#+#+#+#+#+#
-### Resterampe ###--

###
```{r, dpi=300, fig.width=12, eval=FALSE}
tree.cases11 <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Keinen von beiden" & vor.ms!=(-2) & vor.ms!=(-1) & V30>1.5)


tree.cases12 <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Keinen von beiden" & vor.ms!=(-2) & vor.ms!=(-1) & V30<1.5 & V630>1.5)


tree.cases13 <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Keinen von beiden" & vor.ms!=c("-1","-2") & V30<1.5 & V630>1.5 & V330<(-0.5))
                               
# tree.cases2 <- dplyr::filter(tree.cases2, vor.problem!="Anderes" & vor.problem!="Gesundheit" & vor.problem!="Soziale Gerechtigkeit", vor.problem!="FlÃ¼chtlinge")

library(lattice)
heatmap(as.matrix(tree.cases11[, 22:666]),Rowv = NA, main="Path 1 - 121 cases")
heatmap(as.matrix(tree.cases12[, 22:666]),Rowv = NA, main="Path 2 - 18 cases")
heatmap(as.matrix(tree.cases13[, 22:666]),Rowv = NA, main="Path 3 - 2 cases")
```

```{r, eval=FALSE}
print(tree.cases11[,c(15,51,651,667:668)])
print(tree.cases12[, c(15,51,651,667:668)])

```

###


# Darstellung der Konstellation V289 & V361 bei allen Fällen ohne Vorpräferenz Merkel

```{r, dpi=300, eval=FALSE}
library(ggplot2)
# V289 & V361

tree.cases3a <- dplyr::filter(kanzler.change.tree.b, vor.kanzler!="Angela Merkel") 

qplot1 <- qplot(tree.cases3a$V289, tree.cases3a$V361, colour = tree.cases3a$shift.to.merkel, alpha=0.8)

qplot1 + geom_hline(yintercept=0.5) + geom_vline(xintercept=1.5) + ggtitle("Participants with chancellor pre-preference Schulz or neither") + xlab("speaking section V289") + ylab("speaking section V361") + labs(colour = "Participant switched to Merkel\n after debate")

 
```

Darstellung der Konstellation V30 & V360 bei allen Fällen ohne Vorpräferenz Schulz

```{r, dpi=300, eval=FALSE}
tree.cases4a <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Keinen von beiden")

qplot2 <- qplot(tree.cases4a$vor.ms, tree.cases4a$V30, colour = tree.cases4a$shift.to.schulz, alpha=0.8)

qplot2 + geom_hline(yintercept=1.5) + geom_vline(xintercept=(-0.25)) + ggtitle("Participants with chancellor pre-preference neither") + xlab("Pre-evaluation Martin Schulz (vor.ms)") + ylab("speaking section V30") + labs(colour = "Participant switched to Schulz\n after debate")


tree.cases22 <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Angela Merkel")

qplot2 <- qplot(tree.cases22$V30, tree.cases22$V630, colour = tree.cases22$shift.to.schulz, alpha=0.8)

qplot2 + geom_hline(yintercept=1.5) + geom_vline(xintercept=1.5) + ggtitle("Participants with chancellor pre-preference Merkel") + xlab("speaking section V30") + ylab("speaking section V630") + labs(colour = "Participant switched to Schulz\n after debate")

# V78 & V21
#qplot2 <- qplot(kanzler.change.tree2$V78, kanzler.change.tree2$V21, colour = kanzler.change.tree2$shift.to.merkel)

#qplot2 + geom_hline(yintercept=7.5) + geom_vline(xintercept=20.5)

# V45 & V10
#qplot3 <- qplot(kanzler.change.tree2$V45, kanzler.change.tree2$V10, colour = kanzler.change.tree2$kanzler.change)

#qplot3

# V45 & V10 only shift to merkel
#qplot4 <- qplot(V45, V10, data=kanzler.change.tree2[kanzler.change.tree2$kanzler.change>=0.5,], colour = kanzler.change)

#qplot4 + geom_hline(yintercept=1.5) + geom_vline(xintercept=3.5)

# vor.kanzler & V78

#qplot5 <- qplot(vor.kanzler, V78, data=kanzler.change.tree2, colour = kanzler.change)

#qplot5 
```


## Resterampe
### Vergleich mit Vorhersage von nach.kanzler (noch nicht fertig)

```{r}
# vimp1 <- vimp(rf.test.2020.kanzler2)

```

```{r}

# vimp1[["importance"]] -> vimp1data

# as.data.frame(vimp1data) -> vimp1data

# top_n(vimp1data[,2:1], 10) # all
# top_n(vimp1data[,1:2], 10) # Angela Merkel
# top_n(vimp1data[,1:3], 10) # Keiner von beiden
```

### Mit Imputation arbeiten

## Arbeiten mit dem NA count um rauszubekommen wo viele NAs sind

```{r}
## df$na_count <- apply(is.na(df), 1, sum)
```
